# object naming conventions
# F.* - functions
# ALL CAPS - temp variables, can delete
# temp.* - temp variables, can delete

#~~~~~~~~~~~~~~~~~
 library(twang)
 library(rpart)
 library(ipred)
 library(randomForest)
#~~~~~~~~~~~~~~~~~

# function: GBM estimation of propensity score / this is optimized for ATT estimation
    F.gbm.sim <- function(dataset) {
        start.time <- proc.time()[3]
        ps.model <- ps(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10,
                    data=dataset, title="none", stop.method=stop.methods["ks.stat.mean"],
                    plots="none", pdf.plots=F, n.trees=20000, interaction.depth=4,
                    shrinkage=0.0005, verbose=FALSE)

        # CPU processing time
        elapsed <- proc.time()[3]-start.time
        cat("Time elapsed: ", elapsed, "seconds","\n")
   
        # saves propensity scores
        PSCORES <<- ps.model$ps[, 1]
        
        # returns gbm model
        return(ps.model)
    }

# function: GBM estimation of propensity score optimized for either ATT or ATE weights
    F.gbm2.sim <- function(dataset, weight.type) {
        start.time <- proc.time()[3]
        ps.model <- gbm(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10,
                    data=dataset, distribution="bernoulli",
                    n.trees=20000, interaction.depth=4,
                    shrinkage=0.0005, verbose=FALSE)
          
        # find the number of iterations of the gbm algorithm that minimizes asam         
        opt <- optimize(F.asam.iter,            # optimize asam 
                        interval=c(100, 20000), # range in which to search 
                        tol=1,                  # get within one iteration 
                        ps.model=ps.model,      # the gbm model
                        x=dataset[1:10],        # data dropping y, z, w (if there) 
                        z=dataset$z.a,          # the treatment assignment indicator
                        weight.type=weight.type)# either "ATT" or "ATE" 
        
        # store the best number of iterations 
        best.asam.iter <- opt$minimum
        
        odds <- exp(predict(ps.model, dataset, best.asam.iter))             
   
        # saves propensity scores
        PSCORES <<- odds/(1+odds)

        # CPU processing time
        elapsed <- proc.time()[3]-start.time
        cat("Time elapsed: ", elapsed, "seconds","\n", "asam: ", opt$objective)
    }

# function computes the ASAM for the gbm model after "i" iterations 
# x is a data frame with only the covariates 
# z is a vector of 0s and 1s indicating treatment assignment 
# weight.type is "ATT" or "ATE"
    F.asam.iter <- function(i,ps.model,x,z,weight.type) { 
        i <- floor(i) # makes sure that i is an integer  
        
        # predict(ps.model, x, i) provides predicted values on the log-odds of 
        #  treatment for the gbm model with i iterations at the values of x 
        odds <- exp(predict(ps.model, x, i))
        pscores <- odds/(1+odds)
        
        # create temp weights
        w <- rep(1, nrow(x))
            
        if (weight.type=="ATE") {
                w <- ifelse(z==1, 1/pscores, 1/(1-pscores))
        } else
        
        {
                w <- ifelse(z==1, 1, pscores/(1-pscores))
        } 
        
        # sapply repeats calculation of F.std.diff for each variable (column) of x 
        # this calculates ASAM -- the mean of the  
        # standardized differences for all variables in x 
        asam <- mean(unlist(sapply(x, F.std.diff, z=z, w=w)))
        #cat(i,asam,"\n") 
        return(asam) 
    }  
 
# function: logistic regression estimation of propensity score

    F.lrg.sim <- function(dataset) {
        ps.model <- glm(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=dataset, family=binomial(link="logit"))
        odds <- exp(predict(ps.model))
            
        # saves propensity scores
        PSCORES <<- odds/(1+odds)
    }

# function: CART estimation of propensity scores
    
    F.cart.sim <- function(dataset) {
        ps.model <- rpart(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=dataset, method="class", model=F, x=F, y=F)
        
        # saves propensity scores
        PSCORES <<- predict(ps.model)[, 2] # probability that treatment==1        
    }

# function: pruned CART estimation of propensity scores
    F.prune.sim <- function(dataset) {
        ps.model1 <- rpart(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=dataset, method="class", model=F, x=F, y=F)

        # selects a tree size that minimizes the cross-validated error
        # the xerror column printed by printcp( ).
        ps.model2 <- prune(ps.model1, cp=ps.model1$cptable[which.min(ps.model1$cptable[,"xerror"]),"CP"])

        # saves propensity scores
        PSCORES <<- predict(ps.model2)[, 2] # probability that treatment==1                
    }
    
# function: bagged CART estimation of propensity scores
    F.bagc.sim <- function(dataset, boots) {
        ps.model <- bagging(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=dataset, coob=F, nbagg=boots, control=rpart.control(xval=0))

        # saves propensity scores
        PSCORES <<- predict(ps.model, newdata=dataset, type="prob", aggregation="average")[, 2] # probability that treatment==1
    }

# function: random forests estimation of propensity scores
    F.rfrst.sim <- function(dataset) {
        ps1 <- 1
        while (max(ps1)> .999) {
            gc(verbose=TRUE)
            ps.model <- randomForest(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=dataset)
            ps1 <- ps.model$votes[, 2]
        }

        # saves propensity scores
        PSCORES <<- ps1        
    }

# function: calculate ASAM for weighted data
# ASAM: the average standardized absolute mean difference in the covariates

    F.std.diff <- function(u,z,w) 
        { 
        # for variables other than unordered categorical variables compute mean differences 
        # mean(u[z==1]) gives the mean of u for the treatment group 
        # weighted.mean() is a function to calculate weighted mean 
        # u[z==0],w[z==0] select values of u and the weights for the comparison group 
        # weighted.mean(u[z==0],w[z==0],na.rm=TRUE): weighted mean for the comparison group 
        # sd(u[z==1], na.rm=T) calculates the standard deviation for the treatment group 
            
        if(!is.factor(u)) 
        { 
            sd1 <- sd(u[z==1], na.rm=T) 
            if(sd1 > 0) 
            { 
                result <- abs(mean(u[z==1],na.rm=TRUE)- 
                                weighted.mean(u[z==0],w[z==0],na.rm=TRUE))/sd1 
            } else 
            { 
                result <- 0 
                warning("Covariate with standard deviation 0.") 
            } 
        } 
        
        # for factors compute differences in percentages in each category 
        # for(u.level in levels(u) creates a loop that repeats for each level of  
        #  the categorical variable 
        # as.numeric(u==u.level) creates as 0-1 variable indicating u is equal to 
        #  u.level the current level of the for loop 
        # std.diff(as.numeric(u==u.level),z,w)) calculates the absolute  
        #   standardized difference of the indicator variable 
        else 
        { 
            result <- NULL 
            for(u.level in levels(u)) 
            { 
                result <- c(result, std.diff(as.numeric(u==u.level),z,w)) 
            } 
        } 
        return(result) 
      } 
